#encoding=utf8
import jieba.posseg as posseg
import ClearStopWord

def word_property_capture(str_txt,prop_list):
	res_arr = []

	#获取停用词
	stop_words = ClearStopWord.load_stop_wordlist()

	word_prop_list = posseg.cut(str_txt)

	for word,prop in word_prop_list:
		if word not in stop_words and prop in prop_list:
			temps = word + ':' + prop
			if temps not in res_arr:
				res_arr.append(temps)

	return res_arr

if __name__ == '__main__':
	path = r'../CSCMNews/体育/0.txt'
	txt = ClearStopWord.read_file(path)
	prop_list = ['nr','nt','nz','ns']
	print(word_property_capture(txt,prop_list))